set more off
clear all


local flag_population=1
local flag_population_hist=1
local flag_population_merge=1

local flag_labor_force=1

********************************************************************************
************************OECD POPULATION, 1960-**********************************
********************************************************************************
if `flag_population'==1 {

	import delimited "original_data/OECD_population/population_data.csv", clear


	rename time year

	drop v8 flagcodes flags v4 v6 country

	rename ïlocation country
	label variable country "country_code"



	*keep only relevant age statistics
	keep if age=="TOTAL" | age=="0_4" | age=="05_9" | age=="10_14" | age=="15_19"  | age=="20_24" | age=="25_29" | age=="30_34" | age=="35_39" | /*
		*/ | age=="40_44" | age=="45_49" | age=="50_54" | age=="55_59" | age=="60_64" | age=="65_69" | age=="70_74" | age=="75_79" | age=="80_84" | age=="85_OVER"
		
	replace age="5_9" if age=="05_9"

	egen sex_age = concat(sex age), p(_)
	drop sex age

	rename value population
		
	reshape wide population, i(country year) j(sex_age) string

	preserve
		*check that statistics add up
		local list TOTAL 0_4 5_9 10_14 15_19 20_24 25_29 30_34 35_39 40_44 45_49 50_54 55_59 60_64 65_69 70_74 75_79 80_84 85_OVER
		foreach var of local list {
			gen discr_populationT_`var'=(populationM_`var'+populationW_`var'-populationT_`var')/populationT_`var'
		}
		

		local list 0_4 5_9 10_14 15_19 20_24 25_29 30_34 35_39 40_44 45_49 50_54 55_59 60_64 65_69 70_74 75_79 80_84 85_OVER
		gen aux_M=0
		gen aux_W=0
		gen aux_T=0
		foreach var of local list {
			replace aux_M=aux_M+populationM_`var'
			replace aux_W=aux_W+populationW_`var'
			replace aux_T=aux_T+populationT_`var'
		}
		gen discr_M=(aux_M-populationM_TOTAL)/populationM_TOTAL
		gen discr_W=(aux_W-populationW_TOTAL)/populationW_TOTAL
		gen discr_T=(aux_T-populationT_TOTAL)/populationT_TOTAL
	restore


	local list TOTAL 0_4 5_9 10_14 15_19 20_24 25_29 30_34 35_39 40_44 45_49 50_54 55_59 60_64 65_69 70_74 75_79 80_84 85_OVER
	replace populationT_85_OVER=populationM_85_OVER+populationW_85_OVER if country=="ARG" & year==2000
	foreach var of local list {
		gen frac_populationM_`var'=populationM_`var'/(populationM_`var'+populationW_`var')
		gen frac_populationW_`var'=1-frac_populationM_`var'
	}
	drop populationM*
	drop populationW*

	save "original_data/OECD_population/population.dta", replace
}


********************************************************************************
************************OECD POPULATION, 1950-**********************************
********************************************************************************
if `flag_population_hist'==1 {
	
	import delimited "original_data/OECD_population/population_data_hist.csv", clear

	rename time year

	drop country subject v6 frequency v8 v10 unitcode power* reference* flag* unit

	rename ïlocation country
	label variable country "country_code"
	
	replace sex="W" if sex=="FE"
	replace sex="M" if sex=="MA"
	replace sex="T" if sex=="TT"
	
	rename v4 age
	
	replace age="TOTAL" if age=="Population (hist5) All ages"
	replace age="0_4" if age=="Population (hist5)  00-04"
	replace age="5_9" if age=="Population (hist5)  05-09"
	replace age="10_14" if age=="Population (hist5)  10-14"
	replace age="15_19" if age=="Population (hist5)  15-19"
	replace age="20_24" if age=="Population (hist5)  20-24"
	replace age="25_29" if age=="Population (hist5)  25-29"
	replace age="30_34" if age=="Population (hist5)  30-34"
	replace age="35_39" if age=="Population (hist5)  35-39"
	replace age="40_44" if age=="Population (hist5)  40-44"
	replace age="45_49" if age=="Population (hist5)  45-49"
	replace age="50_54" if age=="Population (hist5)  50-54"
	replace age="55_59" if age=="Population (hist5)  55-59"
	replace age="60_64" if age=="Population (hist5)  60-64"
	replace age="65_69" if age=="Population (hist5)  65-69"
	replace age="70_74" if age=="Population (hist5)  70-74"
	replace age="75_79" if age=="Population (hist5)  75-79"
	replace age="80_84" if age=="Population (hist5)  80-84"
	replace age="85_OVER" if age=="Population (hist5)  85+"	

	*keep only relevant age statistics
	keep if age=="TOTAL" | age=="0_4" | age=="5_9" | age=="10_14" | age=="15_19"  | age=="20_24" | age=="25_29" | age=="30_34" | age=="35_39" | /*
		*/ | age=="40_44" | age=="45_49" | age=="50_54" | age=="55_59" | age=="60_64" | age=="65_69" | age=="70_74" | age=="75_79" | age=="80_84" | age=="85_OVER"
	
		

	egen sex_age = concat(sex age), p(_)
	drop sex age

	rename value hist_population
		
	reshape wide hist_population, i(country year) j(sex_age) string
	
	

	preserve
		*check that statistics add up
		local list TOTAL 0_4 5_9 10_14 15_19 20_24 25_29 30_34 35_39 40_44 45_49 50_54 55_59 60_64 65_69 70_74 75_79 80_84 85_OVER
		foreach var of local list {
			gen discr_populationT_`var'=(hist_populationM_`var'+hist_populationW_`var'-hist_populationT_`var')/hist_populationT_`var'
		}

		local list 0_4 5_9 10_14 15_19 20_24 25_29 30_34 35_39 40_44 45_49 50_54 55_59 60_64 65_69 70_74 75_79 80_84 85_OVER
		gen aux_M=0
		gen aux_W=0
		gen aux_T=0
		foreach var of local list {
			replace aux_M=aux_M+hist_populationM_`var'
			replace aux_W=aux_W+hist_populationW_`var'
			replace aux_T=aux_T+hist_populationT_`var'
		}
		gen discr_M=(aux_M-hist_populationM_TOTAL)/hist_populationM_TOTAL
		gen discr_W=(aux_W-hist_populationW_TOTAL)/hist_populationW_TOTAL
		gen discr_T=(aux_T-hist_populationT_TOTAL)/hist_populationT_TOTAL
	restore

	local list TOTAL 0_4 5_9 10_14 15_19 20_24 25_29 30_34 35_39 40_44 45_49 50_54 55_59 60_64 65_69 70_74 75_79 80_84 85_OVER
	foreach var of local list {
		gen hist_frac_populationM_`var'=hist_populationM_`var'/(hist_populationM_`var'+hist_populationW_`var')
		replace hist_frac_populationM_`var'=0 if (hist_frac_populationM_`var'==. & country=="ISR") /*Israel has some issues in the early sample*/
		gen hist_frac_populationW_`var'=1-hist_frac_populationM_`var'
	}
	drop hist_populationM*
	drop hist_populationW*

	save "original_data/OECD_population/hist_population.dta", replace
}

********************************************************************************
************************OECD POPULATION, MERGING********************************
********************************************************************************
if `flag_population_merge'==1 {
	
	use "original_data/OECD_population/population.dta", replace
	merge 1:1 year country using "original_data/OECD_population/hist_population.dta"
	sort country year
	drop _merge
	
	drop if country=="EA19"
	drop if country=="EU28"
	drop if country=="G-7"
	drop if country=="G20"	
	drop if country=="OECD"	
	drop if country=="WLD"
	
	*drop observations for each population by age is not available
	drop if (hist_populationT_TOTAL!=. & hist_frac_populationM_TOTAL==. & frac_populationM_TOTAL==.)
	
	by country: egen min_year=min(year)
	
	local list 0_4 5_9 10_14 15_19 20_24 25_29 30_34 35_39 40_44 45_49 50_54 55_59 60_64 65_69 70_74 75_79 80_84 85_OVER
	gen aux_hist=0
	gen aux=0
	foreach var of local list {
		replace aux=aux+populationT_`var'
		replace aux_hist=aux_hist+hist_populationT_`var'
	}
	foreach var of local list {
		gen frac_`var'=populationT_`var'/aux
		gen hist_frac_`var'=hist_populationT_`var'/aux_hist
	}
	
	
	local list_inner populationT frac_populationM frac_populationW
	
	gen fin_populationT_TOTAL=0
	replace fin_populationT_TOTAL=populationT_TOTAL if year==min_year & populationT_TOTAL!=.
	replace fin_populationT_TOTAL=hist_populationT_TOTAL if year==min_year & populationT_TOTAL==.
	
	by country: replace fin_populationT_TOTAL=fin_populationT_TOTAL[_n-1]*hist_populationT_TOTAL/hist_populationT_TOTAL[_n-1] if populationT_TOTAL[_n-1]==. & _n>1 
	by country: replace fin_populationT_TOTAL=fin_populationT_TOTAL[_n-1]*populationT_TOTAL/populationT_TOTAL[_n-1] if populationT_TOTAL[_n-1]!=. & _n>1 		

	
	foreach var of local list {
		gen fin_populationT_`var'=.
		replace fin_populationT_`var'=frac_`var'*fin_populationT_TOTAL if frac_`var'!=.
		replace fin_populationT_`var'=hist_frac_`var'*fin_populationT_TOTAL if frac_`var'==.
	}
		
	foreach var of local list {
		gen fin_frac_populationM_`var'=.
		replace fin_frac_populationM_`var'=frac_populationM_`var' if frac_populationM_`var'!=.
		replace fin_frac_populationM_`var'=hist_frac_populationM_`var' if frac_populationM_`var'==.
		
		gen fin_frac_populationW_`var'=.
		replace fin_frac_populationW_`var'=frac_populationW_`var' if frac_populationW_`var'!=.
		replace fin_frac_populationW_`var'=hist_frac_populationW_`var' if frac_populationW_`var'==.	
	}
	
	
	
	keep year country fin*
	foreach var of local list {
		foreach var_inner of local list_inner{
			rename fin_`var_inner'_`var' `var_inner'_`var'
		}
	}
	rename fin_populationT_TOTAL populationT_TOTAL
	
	
	*for Cyprus, keep only post-1982 observations (jump in the data)
	drop if (year<1982 & country=="CYP")
	
	save population_final.dta, replace
}

********************************************************************************
*********************LABOR FORCE BY AGE AND SEX*********************************
********************************************************************************
if `flag_labor_force'==1 {
	
	import delimited "original_data/OECD_labor_force/labor_force_data.csv", clear

	rename time year
	
	
	replace sex="W" if sex=="WOMEN"
	replace sex="M" if sex=="MEN"
	replace sex="T" if sex=="MW"
 
	drop country v4 age frequency v10 v12 power* reference* flag* v8 unit* v8
	
	rename v6 age

	rename ïcountry country
	label variable country "country_code"
	
	replace series="Emp" if series=="E"
	replace series="LF" if series=="L"
	replace series="Pop" if series=="P"
	replace series="Unemp" if series=="U"
	
	replace age="TOTAL" if age=="Total"

	
	replace age="15_24" if age=="15 to 24"
	replace age="20_24" if age=="20 to 24"
	replace age="25_34" if age=="25 to 34"
	replace age="25_64" if age=="25 to 64"
	replace age="35_44" if age=="35 to 44"
	replace age="45_54" if age=="45 to 54"
	replace age="55_64" if age=="55 to 64"
	replace age="65_69" if age=="65 to 69"
	replace age="70_74" if age=="70 to 74"
	replace age="75_OVER" if age=="75+"	

	



	*keep only relevant age statistics
	keep if age=="TOTAL" | age=="20_24" | age=="15_24" | age=="25_34" | age=="25_64" | age=="35_44" | age=="45_54" | age=="55_64" | age=="65_69" | age=="70_74" | age=="75_OVER"
		
	egen series_sex_age=concat(series sex age), p(_)
	drop series sex age
	
	rename value number
		
	reshape wide number, i(country year) j(series_sex_age) string
	
	sort country year
	
	
	preserve
		*check that statistics add up
		local list TOTAL 15_24 25_34 25_64 35_44 45_54 55_64
		local list_inner T M W
		foreach var of local list {
			foreach var_inner of local list_inner {
				gen discr_LF_`var_inner'_`var'=(numberEmp_`var_inner'_`var'+numberUnemp_`var_inner'_`var'-numberLF_`var_inner'_`var')/numberLF_`var_inner'_`var'
			}
		}
	restore
	
	foreach v of var number* {
		replace `v'=. if `v'==0
	}
	
	drop if country=="EU16"
	drop if country=="EU22"
	drop if country=="EU28"
	drop if country=="EUR"	
	drop if country=="G7"	
	drop if country=="OECD"
	drop if country=="OCE"	/*oceania*/
	drop if country=="NAM" /*north america*/
	drop if country=="FTFR" /*west germany*/
	
	*for Germany, keep only post-1991 observations (for unified country only)
	drop if (year<1991 & country=="DEU")
	*for Croatia, pre 2007 data does not make sense and does not coincide with TED
	drop if (year<2007 & country=="HRV")

	save "labor_force.dta", replace
	
	
}
